# coding:utf8
import findspark

findspark.init()

from pyspark.sql import SparkSession

if __name__ == '__main__':
    # 0. 构建执行环境入口对象SparkSession
    spark = SparkSession.builder. \
        appName("test"). \
        master("local[*]"). \
        config("spark.sql.shuffle.partitions", 2). \
        getOrCreate()
    sc = spark.sparkContext
    # 1. 读取Mysql数据集
    mysql_rdd = spark.read.format("jdbc"). \
        option("url", "jdbc:mysql://bigdata:3306/pyspark?useSSL=false&useUnicode=true"). \
        option("dbtable", "movie_data"). \
        option("user", "root"). \
        option("password", "hry110@qq.com"). \
        load()
    mysql_rdd.printSchema()
    mysql_rdd.show(100, True)
